######################################
# SELECTION BIAS IN OBSERVED POLITIES
#
# Replication Material for:
# Continuity or Change? (In)direct Rule in British and French Colonial Africa
# 
# Carl Mueller-Crepon, 2020
# International Organization
#
# File Description:
# Analysis of selection of polities into sample
# The following analyses are contained in Appendix A1.1
#
# Called from indrule_analysis_all.R 
#
##################################

##########################################
# INIT
##########################################

# Libraries
library(lfe)
library(survival)
library(stargazer)
library(plyr)
library(ggplot2)

# Paths
fig.path = "figures/polities/"
tab.path = "tables/polities/"

# Load Data
load("data/data_polities.RData")

#########################
# FUNCTIONS
#########################

##### LaTeX Functionalities
source("scripts/functions/analysis_functions.R")


##############################################
# General Regression Setup ###################

# ... main dependent variable
dep.var <- "I(all.yrs > 0)"

# ... main explanatory variable
expl.var <- "v33.num"

# ... control variables
base.controls <- paste(c("v33.num","log(pop.dens.1880 + 1)", "log(dist.coast + 1)",
                         "log(river.dist)","log(area.sqkm)"), collapse = " + ")
ethnic.controls <- paste(c( paste0("v",c(4,5,28),".num")), collapse = " + ")
nature.controls <- paste(c("medianaltitude","medianslope","temperaturemean","evapotranspiration",  
                           "precipitation", "ppetratio", "suit", "max.cash.crop" ), collapse = " + ")
# ... fixed effects
fe <- "| 0"

# ... SE cluster
se.cluster <- "| 0 | 0"

# ... output specification for stargazer

# ... variables to keep and labels
keep.controls <- c("v33.num","pop.dens.1880", "dist.coast", "river.dist", "area.sqkm")
keep.labels <- c(paste("Precol. centralization"), "Population/km$^2$ (1880; log)", 
                 "Distance to coast (log)", "Distance to river (log)", "Area (km$^2$, log)")

# ... notes below tables
notes.p <- "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01."
latex.notes.add <- "Robust standard errors in parenthesis."
latex.notes.add <- function(width = .95){
  paste0("\\parbox[t]{",width,"\\textwidth}{\\textit{Notes:} OLS models. Robust standard errors in parenthesis.
         Nature controls consist of median altitude and slope, mean annual temperature, precipitation
         and evapotranspiration, the ratio of the two, agricultural suitability, and soils' suitability for cash crop production. 
         Ethnic controls consist of the reliance on agriculture and pastoralism, as well as the intensity of agricultural activities. 
         Significance codes: $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01}.")}

###########################
# Descriptive Plot
# Survival by level of pre-colonial centralization
# Appendix Figure A2
##############################################
stub <- "selec.descr.plot"

# Estimate  model with precol. centr. level dummies
m <- felm(as.formula(paste(dep.var, "~ 0 + factor(v33.num)")), 
          data = murdock.pol)

# Prepare plot data
pos <- which(grepl("v33.num",rownames(m$coefficients)))
plot.df <- data.frame(pos = 0:(length(pos)-1),
                      coef = m$coefficients[pos],
                      se = diag(m$vcv[pos,pos])^.5)
plot.df$lb = plot.df$coef - 1.96 * plot.df$se
plot.df$ub = plot.df$coef + 1.96 * plot.df$se


# ... plot
png(paste0(fig.path, stub,".png"), width = 5, height = 2.5, unit = "in", res = 1200)
g <- ggplot(plot.df, 
       aes(x = pos, y = coef)) + geom_point() + 
  geom_errorbar(aes(ymin = lb, 
                    ymax = ub), 
                width=.1) +
  geom_hline(yintercept = 0, color = "darkgrey", lty = 2) + 
  ylab("P(Any observed polity)") + xlab("Degree of precolonial centralization")  
print(g)
dev.off()


###########################################
# INFORMATION ON POLITIES
# Polities aggregated on Murdock polygonsas units of analysis
# Appendix Table A2

# Collapse polity.yr.df to 1 obs per polity, take last year for geographic info
info.df <- polity.yrs.df[(polity.yrs.df$col.brit == 1 | polity.yrs.df$col.frnc == 1) ,]
info.df <- join(aggregate.data.frame(list(year = info.df$year),
                                     info.df[,c("polity.id", "polity")], FUN = max),
                info.df, type = "left")



# Regress characters on french and british
stub <- "selec.nchar.frbr"

# ... full specification 
spec <- c(paste0(c("log(info.nchar+1)"), " ~ ", "colbrit + v33.num"," +", 
                 paste(c("log(popd_1880AD + 0.1) + log(dist.coast + 1) + log(river.dist)"), collapse = "+"),
                 fe, se.cluster),
          paste0(c("log(info.nchar+1)"), " ~ ", "colbrit + v33.num"," +", 
                 paste(c("log(popd_1880AD + 0.1) + log(dist.coast + 1) + log(river.dist)",nature.controls), collapse = "+"),
                 fe, se.cluster),
          paste0(c("log(info.nchar+1)"), " ~ ", "colbrit + v33.num"," +", 
                 paste(c("log(popd_1880AD + 0.1) + log(dist.coast + 1) + log(river.dist + 1)", ethnic.controls,nature.controls), collapse = "+"),
                 fe, se.cluster))


# Estimation
this.m <- lapply(spec, function(form.str){felm(as.formula(form.str), data = info.df)})

# Stargazer
these.keep <- c("colbrit",  keep.controls)
these.labels <- c("British colony", keep.labels)

# ... Info
f.stat <- latex.any.row("F-Stat:", round_any(unlist(lapply(this.m, function(m){summary(m)$F.fstat["F"]})), 0.01) )
mean.dv <- latex.any.row("Mean DV", round_any(unlist(lapply(this.m, function(m){mean(m$response)})), 0.01) )
add.lines <- list(latex.any.row("Nature controls: ",c("no","yes","yes")),
                  latex.any.row("Ethnic controls: ",c("no","no","yes")),
                  mean.dv, f.stat)

# Save
fileConn<-file(paste0(tab.path, stub, ".tex"))
writeLines(
  stargazer(this.m,
            title="Information per polity",
            dep.var.caption = "",dep.var.labels.include = FALSE,
            column.labels = c( "log(Characters of historical account)"),column.separate = c(3),
            keep= these.keep, covariate.labels = these.labels,
            notes.align = "l",label=paste0("tab.",stub),align =T,
            add.lines = add.lines, digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),
            notes = latex.notes.add(.81), notes.label = "", notes.append = F,
            type  = "latex"), 
  fileConn)
close(fileConn)

# Difference between French and British colonizers ######################################
# Polities aggregated on Murdock polygons
# Appendix Table A3

stub <- "selec.murdock.frbr"

# ... full specification 
spec <- paste0(c("I(all.yrs>0)","disc.yrs","uni.yrs","all.yrs"), " ~ ", "colbrit"," +", 
               paste(c(base.controls,ethnic.controls,nature.controls), collapse = "+"),
               " | 0 ", se.cluster)


# Estimation
this.m <- lapply(spec, function(form.str){felm(as.formula(form.str), 
                                               data = murdock.pol[murdock.pol$colbrit == 1 | murdock.pol$colfrnc == 1,])})

# Stargazer

# ... New labels
these.keep <- c("colbrit",  keep.controls)
these.labels <- c("British colony", keep.labels)

# ... Info
f.stat <- latex.any.row("F-Stat:", round_any(unlist(lapply(this.m, function(m){summary(m)$F.fstat["F"]})), 0.01) )
mean.dv <- latex.any.row("Mean DV", round_any(unlist(lapply(this.m, function(m){mean(m$response)})), 0.01) )
add.lines <- list(latex.any.row("Ethnic controls: ",rep("yes", length(this.m))),
                  latex.any.row("Nature controls: ",rep("yes", length(this.m))),
                  mean.dv, f.stat)

# Save
fileConn<-file(paste0(tab.path, stub, ".tex"))
writeLines(
  stargazer(this.m,
            title="Observed polity-history per ethnic group: Difference between French and British colonizers",
            dep.var.caption = "Years of precolonial data:",dep.var.labels.include = FALSE,
            column.labels = c( "P(any year)", "discounted","unique","all"),
            keep= these.keep, covariate.labels = these.labels,
            notes.align = "l",label=paste0("tab.",stub),align =T,
            add.lines = add.lines, digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),column.sep.width ="3pt",
            notes = latex.notes.add(), notes.label = "", notes.append = F,
            type  = "latex"), 
  fileConn)
close(fileConn)


#####################################################
# RASTER CELLS 
# Polities aggregated to raster cells
# Appendix Table A4

# Difference French and British Rule on Raster
stub <- "selec.raster.frbr"

# ... full specification 
spec <- paste0(c("I(all.yrs>0)","disc.yrs","uni.yrs","all.yrs"), " ~ ", "colbrit + v33.num"," +", 
               paste(c("log(pop.dens.1880 + 1) + log(dist.coast + 1) + log(river.dist + 1)", ethnic.controls,nature.controls), collapse = "+"),
               fe, se.cluster)


# Estimation
this.m <- lapply(spec, function(form.str){felm(as.formula(form.str), data = pol.rast.df[pol.rast.df$colbrit == 1 | pol.rast.df$colfrnc == 1,])})

# Stargazer
these.keep <- c("colbrit",  keep.controls[-length(keep.controls)]) # No area
these.labels <- c("British colony", keep.labels[-length(keep.labels)])

# ... Info
f.stat <- latex.any.row("F-Stat:", round_any(unlist(lapply(this.m, function(m){summary(m)$F.fstat["F"]})), 0.01) )
mean.dv <- latex.any.row("Mean DV", round_any(unlist(lapply(this.m, function(m){mean(m$response)})), 0.01) )
add.lines <- list(latex.any.row("Ethnic controls: ",rep("yes", length(this.m))),
                  latex.any.row("Nature controls: ",rep("yes", length(this.m))),
                  mean.dv, f.stat)

# Save
fileConn<-file(paste0(tab.path, stub, ".tex"))
writeLines(
  stargazer(this.m,
            title="Observed polity-history per raster cell",
            dep.var.caption = "Years of precolonial data:",dep.var.labels.include = FALSE,
            column.labels = c( "P(any year)", "discounted","unique","all"),
            keep= these.keep, covariate.labels = these.labels,
            notes.align = "l",label=paste0("tab.",stub),align =T,
            add.lines = add.lines, digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),column.sep.width ="3pt",
            notes =  latex.notes.add(), notes.label = "", notes.append = F,
            type  = "latex"), 
  fileConn)
close(fileConn)



